Grabbing SPINS gradients

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Warning: package 'ggseg' was built under R version 4.1.3
## Loading required package: prettyGraphs
## Loading required package: ExPosition
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## 
## Attaching package: 'colorspace'
## The following object is masked from 'package:PTCA4CATA':
## 
##     lighten

Read in the SPINS big table

## New names:
## * `` -> ...1
## Rows: 337120 Columns: 16
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (5): ROI, Task, Subject, Network, Site
## dbl (11): ...1, grad1, grad2, grad3, grad4, grad5, grad6, grad7, grad8, grad...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## * `` -> ...1
## Rows: 337120 Columns: 16
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (5): ROI, Task, Subject, Network, Site
## dbl (11): ...1, grad1, grad2, grad3, grad4, grad5, grad6, grad7, grad8, grad...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

read subject data

## Rows: 354 Columns: 38
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (7): record_id, site, scanner, diagnostic_group, demo_sex, subject, ses...
## dbl (22): demo_age_study_entry, scog_tasit1_total, scog_tasit2_total, scog_t...
## lgl  (9): term_early_withdraw, has_RS_grads, has_EA_grads, RS_QC_exclude, EA...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
##  [1] "record_id"                        "site"                            
##  [3] "scanner"                          "diagnostic_group"                
##  [5] "demo_sex"                         "demo_age_study_entry"            
##  [7] "term_early_withdraw"              "scog_tasit1_total"               
##  [9] "scog_tasit2_total"                "scog_tasit3_total"               
## [11] "np_composite_tscore"              "np_domain_tscore_process_speed"  
## [13] "np_domain_tscore_att_vigilance"   "np_domain_tscore_work_mem"       
## [15] "np_domain_tscore_verbal_learning" "np_domain_tscore_visual_learning"
## [17] "np_domain_tscore_reasoning_ps"    "np_domain_tscore_social_cog"     
## [19] "bprs_factor_pos_symp"             "bprs_factor_total"               
## [21] "bprs_factor_neg_symp"             "qls_total"                       
## [23] "sans_total_sc"                    "bsfs_total"                      
## [25] "subject"                          "session"                         
## [27] "n_scans_task-emp"                 "n_scans_task-rest"               
## [29] "fd_mean_task-emp"                 "fd_mean_task-rest"               
## [31] "has_RS_grads"                     "has_EA_grads"                    
## [33] "RS_QC_exclude"                    "EA_QC_exclude"                   
## [35] "RS_mo_exclude"                    "EA_mo_exclude"                   
## [37] "withdrawn_exclude"                "exclude_MRI"
##  [1] "scog_tasit1_total"                "scog_tasit2_total"               
##  [3] "scog_tasit3_total"                "np_composite_tscore"             
##  [5] "np_domain_tscore_process_speed"   "np_domain_tscore_att_vigilance"  
##  [7] "np_domain_tscore_work_mem"        "np_domain_tscore_verbal_learning"
##  [9] "np_domain_tscore_visual_learning" "np_domain_tscore_reasoning_ps"   
## [11] "np_domain_tscore_social_cog"      "bprs_factor_pos_symp"            
## [13] "bprs_factor_total"                "bprs_factor_neg_symp"            
## [15] "qls_total"                        "sans_total_sc"                   
## [17] "bsfs_total"

Check subject overlap

grad.sub <- spins_grads_wide$Subject[order(spins_grads_wide$Subject)] # everyone 430
behav.sub <- spins_dx$subject[order(spins_dx$subject)] # 193 cases

behav.sub[behav.sub %in% grad.sub == FALSE]
## [1] "sub-CMP0180" "sub-CMP0182" "sub-CMP0196" "sub-CMP0198" "sub-CMP0213"
## [6] "sub-ZHH0034"
grad.sub[grad.sub %in% behav.sub == FALSE]
##   [1] "sub-CMH0004" "sub-CMH0005" "sub-CMH0007" "sub-CMH0008" "sub-CMH0009"
##   [6] "sub-CMH0011" "sub-CMH0014" "sub-CMH0015" "sub-CMH0016" "sub-CMH0017"
##  [11] "sub-CMH0020" "sub-CMH0023" "sub-CMH0026" "sub-CMH0028" "sub-CMH0030"
##  [16] "sub-CMH0031" "sub-CMH0036" "sub-CMH0038" "sub-CMH0040" "sub-CMH0046"
##  [21] "sub-CMH0047" "sub-CMH0048" "sub-CMH0050" "sub-CMH0052" "sub-CMH0054"
##  [26] "sub-CMH0055" "sub-CMH0059" "sub-CMH0065" "sub-CMH0066" "sub-CMH0069"
##  [31] "sub-CMH0070" "sub-CMH0072" "sub-CMH0073" "sub-CMH0080" "sub-CMH0086"
##  [36] "sub-CMH0091" "sub-CMH0093" "sub-CMH0097" "sub-CMH0104" "sub-CMH0109"
##  [41] "sub-CMH0113" "sub-CMH0114" "sub-CMH0118" "sub-CMH0119" "sub-CMH0120"
##  [46] "sub-CMH0123" "sub-CMH0125" "sub-CMH0126" "sub-CMH0134" "sub-CMH0136"
##  [51] "sub-CMH0144" "sub-CMH0148" "sub-CMH0149" "sub-CMH0152" "sub-CMH0155"
##  [56] "sub-CMH0158" "sub-CMH0159" "sub-CMH0162" "sub-CMH0171" "sub-CMH0176"
##  [61] "sub-CMH0194" "sub-CMH0197" "sub-CMP0175" "sub-CMP0178" "sub-CMP0183"
##  [66] "sub-CMP0185" "sub-CMP0187" "sub-CMP0188" "sub-CMP0190" "sub-CMP0193"
##  [71] "sub-CMP0199" "sub-CMP0202" "sub-CMP0206" "sub-CMP0209" "sub-CMP0216"
##  [76] "sub-CMP0217" "sub-CMP0218" "sub-CMP0219" "sub-MRC0001" "sub-MRC0008"
##  [81] "sub-MRC0010" "sub-MRC0012" "sub-MRC0013" "sub-MRC0016" "sub-MRC0017"
##  [86] "sub-MRC0020" "sub-MRC0021" "sub-MRC0023" "sub-MRC0024" "sub-MRC0026"
##  [91] "sub-MRC0027" "sub-MRC0028" "sub-MRC0029" "sub-MRC0030" "sub-MRC0033"
##  [96] "sub-MRC0036" "sub-MRC0037" "sub-MRC0042" "sub-MRC0043" "sub-MRC0044"
## [101] "sub-MRC0046" "sub-MRC0047" "sub-MRC0051" "sub-MRC0054" "sub-MRC0057"
## [106] "sub-MRC0058" "sub-MRC0060" "sub-MRC0064" "sub-MRC0065" "sub-MRC0066"
## [111] "sub-MRC0068" "sub-MRC0069" "sub-MRC0070" "sub-MRC0071" "sub-MRC0074"
## [116] "sub-MRP0019" "sub-MRP0067" "sub-MRP0076" "sub-MRP0079" "sub-MRP0081"
## [121] "sub-MRP0082" "sub-MRP0084" "sub-MRP0085" "sub-MRP0088" "sub-MRP0093"
## [126] "sub-MRP0095" "sub-MRP0099" "sub-MRP0100" "sub-MRP0101" "sub-MRP0102"
## [131] "sub-MRP0105" "sub-MRP0107" "sub-MRP0109" "sub-MRP0115" "sub-MRP0116"
## [136] "sub-MRP0120" "sub-MRP0121" "sub-MRP0122" "sub-MRP0123" "sub-MRP0125"
## [141] "sub-MRP0127" "sub-MRP0128" "sub-MRP0132" "sub-MRP0135" "sub-MRP0136"
## [146] "sub-MRP0137" "sub-MRP0139" "sub-MRP0143" "sub-MRP0144" "sub-MRP0148"
## [151] "sub-MRP0152" "sub-MRP0156" "sub-MRP0157" "sub-MRP0162" "sub-MRP0165"
## [156] "sub-ZHH0001" "sub-ZHH0002" "sub-ZHH0003" "sub-ZHH0004" "sub-ZHH0005"
## [161] "sub-ZHH0006" "sub-ZHH0007" "sub-ZHH0008" "sub-ZHH0009" "sub-ZHH0010"
## [166] "sub-ZHH0011" "sub-ZHH0014" "sub-ZHH0017" "sub-ZHH0019" "sub-ZHH0020"
## [171] "sub-ZHH0022" "sub-ZHH0023" "sub-ZHH0024" "sub-ZHH0025" "sub-ZHH0029"
## [176] "sub-ZHH0033" "sub-ZHH0037" "sub-ZHH0040" "sub-ZHH0044" "sub-ZHH0045"
## [181] "sub-ZHH0047" "sub-ZHH0048" "sub-ZHH0052" "sub-ZHH0053" "sub-ZHH0054"
## [186] "sub-ZHH0056" "sub-ZHH0057" "sub-ZHP0060" "sub-ZHP0063" "sub-ZHP0065"
## [191] "sub-ZHP0067" "sub-ZHP0068" "sub-ZHP0069" "sub-ZHP0072" "sub-ZHP0073"
## [196] "sub-ZHP0074" "sub-ZHP0085" "sub-ZHP0087" "sub-ZHP0088" "sub-ZHP0093"
## [201] "sub-ZHP0094" "sub-ZHP0095" "sub-ZHP0097" "sub-ZHP0098" "sub-ZHP0100"
## [206] "sub-ZHP0103" "sub-ZHP0105" "sub-ZHP0107" "sub-ZHP0108" "sub-ZHP0114"
## [211] "sub-ZHP0115" "sub-ZHP0116" "sub-ZHP0117" "sub-ZHP0118" "sub-ZHP0119"
## [216] "sub-ZHP0120" "sub-ZHP0122" "sub-ZHP0124" "sub-ZHP0129" "sub-ZHP0130"
## [221] "sub-ZHP0132" "sub-ZHP0133" "sub-ZHP0134" "sub-ZHP0136" "sub-ZHP0137"
## [226] "sub-ZHP0138" "sub-ZHP0139" "sub-ZHP0141" "sub-ZHP0142" "sub-ZHP0143"
## [231] "sub-ZHP0144" "sub-ZHP0145" "sub-ZHP0148" "sub-ZHP0149" "sub-ZHP0152"
## [236] "sub-ZHP0155" "sub-ZHP0162" "sub-ZHP0163" "sub-ZHP0165" "sub-ZHP0166"
## [241] "sub-ZHP0167" "sub-ZHP0168" "sub-ZHP0170"
## kept subjects
kept.sub <- behav.sub[behav.sub %in% grad.sub] # 187
kept.sub[complete.cases(spins_behav_num[kept.sub,]) == FALSE]
## [1] "sub-CMH0002" "sub-CMH0013" "sub-ZHP0081" "sub-ZHP0083"
kept.sub <- kept.sub[complete.cases(spins_behav_num[kept.sub,])] # 183

## grab the matching data

behav.dat <- spins_behav_num[kept.sub,]
spins_grads_wide_org <- spins_grads_wide[,-1]
rownames(spins_grads_wide_org) <- spins_grads_wide$Subject
grad.dat <- spins_grads_wide_org[kept.sub,]

## variables to regress out
regout.dat <- var2regout_num[kept.sub,]

rm(spins_grads_wide, spins_grads_wide_org, lol_spins_behav, spins_behav_num, spins_grads_num)

Regress out the effects

behav.reg <- apply(behav.dat, 2, function(x) lm(x~regout.dat$demo_sex + regout.dat$demo_age_study_entry + regout.dat$fd_mean_task.rest)$residual)

grad.reg <- apply(grad.dat, 2, function(x) lm(x~regout.dat$demo_sex + regout.dat$demo_age_study_entry + regout.dat$fd_mean_task.rest)$residual)

grab some network colours

networks <- read_delim("../networks.txt", 
                       "\t", escape_double = FALSE, trim_ws = TRUE) %>%
  select(NETWORK, NETWORKKEY, RED, GREEN, BLUE, ALPHA) %>%
  distinct() %>%
  add_row(NETWORK = "Subcortical", NETWORKKEY = 13, RED = 0, GREEN=0, BLUE=0, ALPHA=255) %>%
  mutate(hex = rgb(RED, GREEN, BLUE, maxColorValue = 255)) %>%
  arrange(NETWORKKEY)
## Rows: 718 Columns: 12
## -- Column specification --------------------------------------------------------
## Delimiter: "\t"
## chr (4): LABEL, HEMISPHERE, NETWORK, GLASSERLABELNAME
## dbl (8): INDEX, KEYVALUE, RED, GREEN, BLUE, ALPHA, NETWORKKEY, NETWORKSORTED...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
networks$hex <- darken(networks$hex, 0.2)

# oi <- networks$hex
# swatchplot(
#   "-40%" = lighten(oi, 0.4),
#   "-20%" = lighten(oi, 0.2),
#   "  0%" = oi,
#   " 20%" =  darken(oi, 0.2),
#   " 25%" =  darken(oi, 0.25),
#   " 30%" =  darken(oi, 0.3),
#   " 35%" =  darken(oi, 0.35),
#   off = c(0, 0)
# )

networks
## # A tibble: 13 x 7
##    NETWORK              NETWORKKEY   RED GREEN  BLUE ALPHA hex    
##    <chr>                     <dbl> <dbl> <dbl> <dbl> <dbl> <chr>  
##  1 Visual1                       1     0     0   255   255 #0707CF
##  2 Visual2                       2   100     0   255   255 #5001D0
##  3 Somatomotor                   3     0   255   255   255 #11C7C7
##  4 Cingulo-Opercular             4   153     0   153   255 #7D007D
##  5 Dorsal-Attention              5     0   255     0   255 #10C710
##  6 Language                      6     0   155   155   255 #097A7A
##  7 Frontoparietal                7   255   255     0   255 #C7C70B
##  8 Auditory                      8   250    62   251   255 #D105D2
##  9 Default                       9   255     0     0   255 #CC0303
## 10 Posterior-Multimodal         10   177    89    40   255 #88492D
## 11 Ventral-Multimodal           11   255   157     0   255 #C97B05
## 12 Orbito-Affective             12    65   125     0   168 #336400
## 13 Subcortical                  13     0     0     0   255 #000000

get row and column designs

## match ROIs to networks
ROI.network.match <- cbind(spins_grads$ROI, spins_grads$Network) %>% unique
ROI.idx <- ROI.network.match[,2]
names(ROI.idx) <- ROI.network.match[,1]
### match networks with colors
net.col.idx <- networks$hex
names(net.col.idx) <- networks$NETWORK

## design matrix for subjects
sub.dx <- spins_dx_org[kept.sub,]

diagnostic.col <- sub.dx$diagnostic_group %>% as.matrix %>% makeNominalData() %>% createColorVectorsByDesign()
rownames(diagnostic.col$gc) <- sub(".","", rownames(diagnostic.col$gc))

## design matrix for columns - behavioral 
behav.dx <- matrix(nrow = ncol(behav.dat), ncol = 1, dimnames = list(colnames(behav.dat), "type")) %>% as.data.frame

behav.col <- c("scog" = "#F28E2B",
               "np" = "#59A14F",
               "bprs" = "#E15759",
               "qls" = "#B07AA1",
               "sans" = "#FF9888",
               "bsfs" = "#D37295")

behav.dx$type <- sub("(^[^_]+).*", "\\1", colnames(behav.dat))
behav.dx$type.col <- recode(behav.dx$type, !!!behav.col)

## design matrix for columns - gradient
grad.dx <- matrix(nrow = ncol(grad.dat), ncol = 4, dimnames = list(colnames(grad.dat), c("gradient", "ROI", "network", "network.col"))) %>% as.data.frame

grad.dx$gradient <- sub("(^[^.]+).*", "\\1", colnames(grad.dat))
grad.dx$ROI <- sub("^[^.]+.(*)", "\\1", colnames(grad.dat))
grad.dx$network <- recode(grad.dx$ROI, !!!ROI.idx)
grad.dx$network.col <- recode(grad.dx$network, !!!net.col.idx)

## get different alpha for gradients
grad.col.idx <- c("grad1" = "grey30",
                  "grad2" = "grey60",
                  "grad3" = "grey90")
grad.dx$gradient.col <- recode(grad.dx$gradient, !!!grad.col.idx)

## for heatmap
col.heat <- colorRampPalette(c("red", "white", "blue"))(256)

Run PLS-C

pls.res <- tepPLS(behav.reg, grad.reg, make_design_nominal = TRUE, graphs = FALSE)
pls.boot <- data4PCCAR::Boot4PLSC(behav.reg, grad.reg, scale1 = TRUE, scale2 = TRUE, nf2keep = 4)
## Registered S3 method overwritten by 'data4PCCAR':
##   method                  from     
##   print.str_colorsOfMusic PTCA4CATA
## Warning in matrix(svd.S$d, nJ, nf2keep, byrow = TRUE): data length [17] is not a
## sub-multiple or multiple of the number of rows [1176]
# ## swith direction for dimension 3
# pls.res$TExPosition.Data$fi[,3] <- pls.res$TExPosition.Data$fi[,3]*-1
# pls.res$TExPosition.Data$fj[,3] <- pls.res$TExPosition.Data$fj[,3]*-1
# pls.res$TExPosition.Data$pdq$p[,3] <- pls.res$TExPosition.Data$pdq$p[,3]*-1
# pls.res$TExPosition.Data$pdq$q[,3] <- pls.res$TExPosition.Data$pdq$q[,3]*-1
# pls.res$TExPosition.Data$lx[,3] <- pls.res$TExPosition.Data$lx[,3]*-1
# pls.res$TExPosition.Data$ly[,3] <- pls.res$TExPosition.Data$ly[,3]*-1

## Scree plot
PlotScree(pls.res$TExPosition.Data$eigs)

## Print singular values
pls.res$TExPosition.Data$pdq$Dv
##  [1] 7.7781559 4.7276193 4.0514314 2.9234134 2.4650566 2.3209054 2.2415552
##  [8] 1.8705639 1.6937494 1.5940652 1.4065657 1.2366213 1.1036335 0.9833056
## [15] 0.8857666 0.8095011 0.1207203
## Print eigenvalues
pls.res$TExPosition.Data$eigs
##  [1] 60.49970844 22.35038418 16.41409673  8.54634610  6.07650422  5.38660173
##  [7]  5.02456985  3.49900925  2.86878704  2.54104379  1.97842704  1.52923234
## [13]  1.21800695  0.96688990  0.78458241  0.65529211  0.01457339
## Compare the inertia to the largest possible inertia
sum(cor(behav.dat, grad.dat)^2)
## [1] 164.1464
sum(cor(behav.dat, grad.dat)^2)/(ncol(behav.dat)*ncol(grad.dat))
## [1] 0.008210605

Here, we show that the effect that PLSC decomposes is pretty small to begin with. The effect size of the correlation between the two tables is 92.40 which accounts for 0.0065 of the largest possible effect.

Results

Dimension 1

lxly.out[[1]]

gridExtra::grid.arrange(bar.grad1, bar.grad2, bar.grad3, ncol = 1)

PrettyBarPlot2(pls.res$TExPosition.Data$fi[,1],
               threshold = 0, 
               color4bar = ifelse(pls.boot$bootRatiosSignificant.i[,1] == TRUE, behav.dx$type.col, "grey90"),
               horizontal = FALSE, main = "Scores - behavioural")

cor.heat <- pls.res$TExPosition.Data$X %>% heatmap(col = col.heat)

Dimension 2

lxly.out[[2]]

gridExtra::grid.arrange(bar.grad1, bar.grad2, bar.grad3, ncol = 1)

PrettyBarPlot2(pls.res$TExPosition.Data$fi[,2],
               threshold = 0, color4bar = ifelse(pls.boot$bootRatiosSignificant.i[,2] == TRUE, behav.dx$type.col, "grey90"), 
               horizontal = FALSE, main = "Scores - behavioural")

dim1.est <- pls.res$TExPosition.Data$pdq$Dv[1]*as.matrix(pls.res$TExPosition.Data$pdq$p[,1], ncol = 1) %*% t(as.matrix(pls.res$TExPosition.Data$pdq$q[,1], ncol = 1))

cor.heat.res1 <- (pls.res$TExPosition.Data$X - dim1.est) %>% heatmap(col = col.heat)

Dimension 3

lxly.out[[3]]

gridExtra::grid.arrange(bar.grad1, bar.grad2, bar.grad3, ncol = 1)

PrettyBarPlot2(pls.res$TExPosition.Data$fi[,3],
               threshold = 0, color4bar = ifelse(pls.boot$bootRatiosSignificant.i[,3] == TRUE, behav.dx$type.col, "grey90"),
               horizontal = FALSE, main = "Scores - behavioural")

dim2.est <- (as.matrix(pls.res$TExPosition.Data$pdq$p[,1:2]) %*% pls.res$TExPosition.Data$pdq$Dd[1:2,1:2] %*% t(as.matrix(pls.res$TExPosition.Data$pdq$q[,1:2])))


cor.heat.res2 <- heatmap(pls.res$TExPosition.Data$X - dim2.est, col = col.heat)

Dimension 4

lxly.out[[4]]

gridExtra::grid.arrange(bar.grad1, bar.grad2, bar.grad3, ncol = 1)

PrettyBarPlot2(pls.res$TExPosition.Data$fi[,4],
               threshold = 0, color4bar =ifelse(pls.boot$bootRatiosSignificant.i[,4] == TRUE, behav.dx$type.col, "grey90"), horizontal = FALSE, main = "Scores - behavioural")

dim3.est <- (as.matrix(pls.res$TExPosition.Data$pdq$p[,1:3]) %*% pls.res$TExPosition.Data$pdq$Dd[1:3,1:3] %*% t(as.matrix(pls.res$TExPosition.Data$pdq$q[,1:3])))


cor.heat.res3 <- heatmap(pls.res$TExPosition.Data$X - dim3.est, col = col.heat)

Back into the brain

Dimension 1

## merging atlas and data by 'label'
## merging atlas and data by 'label'

Dimension 2

## merging atlas and data by 'label'
## merging atlas and data by 'label'

Dimension 3

## merging atlas and data by 'label'
## merging atlas and data by 'label'

Dimension 4

## merging atlas and data by 'label'
## merging atlas and data by 'label'

Fancy figures

3D plot of the gradients

Dimension 1

We need to interpret the arrows with cautious, because only the direction and the magnitude are meaningful but not the end point.

Dimension 2

We need to interpret the arrows with cautious, because only the direction and the magnitude are meaningful but not the end point.

Dimension 3

We need to interpret the arrows with cautious, because only the direction and the magnitude are meaningful but not the end point.

Dimension 4

We need to interpret the arrows with cautious, because only the direction and the magnitude are meaningful but not the end point.